## ============================================================================
## 00_setup.R
## ----------------------------------------------------------------------------
## Purpose:   Load packages, define helper functions, set output paths,
##            and load the primary survey dataset.
##
## Usage:     This script is sourced at the top of every other script in the
##            replication package. Set your working directory to the root of
##            the replication folder before running any script.
##
## Inputs:    combined_data.rds
## Outputs:   None (setup only)
##
## Required:  R >= 4.1.0
## ============================================================================

rm(list = ls())
gc()

# ---- Packages ----------------------------------------------------------------

# Suppress startup messages for cleaner output
suppressMessages({

  # Core tidyverse and data manipulation
  library(tidyverse)
  library(lubridate)

  # Causal inference and estimation
  library(estimatr)       # lm_robust(), lm_lin()
  library(grf)            # Generalized Random Forests (causal forests)
  library(crossEstimation) # devtools::install_github("swager/crossEstimation")
  library(npcausal)       # devtools::install_github("ehkennedy/npcausal")
  library(autumn)         # devtools::install_github("aaronrudkin/autumn")
  library(lavaan)         # Confirmatory factor analysis (CFA)

  # Plotting
  library(ggthemes)       # theme_tufte()
  library(ggforce)        # facet_col()
  library(ggh4x)          # facet_grid2(), facetted_pos_scales()
  library(coefplot)
  library(lemon)          # coord_capped_cart(), brackets_horizontal()

  # Tables
  library(xtable)         # LaTeX table export

  # Text analysis (used in 04_si_s1_prevalence_expert.R)
  library(textclean)
  library(quanteda)
  library(quanteda.textstats)

})

# Additional packages called via :: (must be installed but not loaded):
#   psych           polychoric correlations, Cronbach's alpha
#   MBESS           ci.reliability() for McDonald's omega
#   broom           tidy() for model output formatting
#   margins         marginal effects for GLMs
#   scales          label_percent(), etc.

# ---- Output Paths ------------------------------------------------------------

# All paths are relative to the replication folder (working directory)
fig_dir <- "Figures/"
tab_dir <- "Tables/"

# ---- Helper Functions --------------------------------------------------------

#' Format a number with fixed decimal places
format_num <- function(x, digits = 2) {
  x <- as.numeric(x)
  sprintf(paste0("%.", digits, "f"), x)
}

#' Wrap a number in parentheses with fixed decimal places
add_parens <- function(x, digits = 2) {
  x <- as.numeric(x)
  paste0("(", sprintf(paste0("%.", digits, "f"), x), ")")
}

#' Format a table entry as "estimate (se)"
table_entry <- function(est, se, digits = 2) {
  paste0(format_num(est, digits = digits), " ",
         add_parens(se, digits = digits))
}

#' Format a table entry as "estimate (se)*" with significance star
make_entry <- function(est, se, p, digits = 2) {
  entry <- paste0(format_num(est, digits = digits), " ",
                  add_parens(se, digits = digits))
  entry[p < 0.05] <- paste0(entry[p < 0.05], "*")
  entry
}

#' Standardize outcome by control group SD (Glass's delta)
#'
#' @param Y     Numeric outcome vector
#' @param Z     Binary treatment indicator
#' @param reference  Value of Z indicating the reference (control) group
glass_delta <- function(Y, Z, reference) {
  Y / sd(Y[Z == reference], na.rm = TRUE)
}

#' Normalize a vector of proportions to sum to 1
normalize <- function(input_vector) {
  normalized_x <- input_vector / sum(input_vector)
  rounded_x <- round(normalized_x, 2)
  rounded_x[length(rounded_x)] <- 1 - sum(rounded_x[-length(rounded_x)])
  rounded_x
}

# ---- Load Primary Dataset ----------------------------------------------------

combined_dat <- read_rds("combined_data.rds")

# ---- Session Info ------------------------------------------------------------

# Print R version for logging (full sessionInfo() is printed at end of each script)
cat("R version:", R.version.string, "\n")
cat("Working directory:", getwd(), "\n\n")
